package com.simple.database.pageprocess;  
  
import java.io.File;  
import java.io.FileOutputStream;  
import java.io.InputStream;  
import java.net.URL;  
import java.net.URLConnection;  
import java.util.ArrayList;  
import java.util.List;  
import java.util.regex.Matcher;  
import java.util.regex.Pattern;  
  
/*** 
 * java抓取网络图片 
 * @author swinglife 
 * 
 */  
public class CatchImage {  
  
    // 地址  
    private static final String URL = "http://wmtp.net/6965";  
    // 编码  
    private static final String ECODING = "UTF-8";  
    // 获取img标签正则  
//    private static final String IMGURL_REG = "<(img|a|p|b|div|br)\\s*([\\w]*=(\"|\')([^\"\'<]*)(\"|\')\\s*)*(/>|>)";  
    private static final String IMGURL_REG = "<img.*src=(.*?)[^>]*?>";  
    // 获取src路径的正则  
    private static final String IMGSRC_REG = "http:\"?(.*?)(\"|>|\\s+)";  
  
      
    public static void main(String[] args) throws Exception {  
    	try {
//				for(int i=1;i<6961;i++){
					CatchImage cm = new CatchImage();  
					//获得html文本内容  
					String HTML = cm.getHTML(URL);  
//	        System.out.println("HTML ="+HTML);
					//获取图片标签  
					List<String> imgUrl = cm.getImageUrl(HTML);  
					System.out.println("imgUrl ="+imgUrl);
					//获取图片src地址  
					List<String> imgSrc = cm.getImageSrc(imgUrl);  
					System.out.println("imgSrc ="+imgSrc);
					//下载图片  
					cm.Download(imgSrc);  
//			}
		} catch (Exception e) {
			if(e.toString().contains("java.io.FileNotFoundException")){
				//
			}
			e.printStackTrace();
		}
    }  
      
      
    /*** 
     * 获取HTML内容 
     *  
     * @param url 
     * @return 
     * @throws Exception 
     */  
    private String getHTML(String url) throws Exception {  
        URL uri = new URL(url);  
        URLConnection connection = uri.openConnection();  
        InputStream in = connection.getInputStream();  
        byte[] buf = new byte[1024];  
        int length = 0;  
        StringBuffer sb = new StringBuffer();  
        while ((length = in.read(buf, 0, buf.length)) > 0) {  
            sb.append(new String(buf, ECODING));  
        }  
        in.close();  
        return sb.toString();  
    }  
  
    /*** 
     * 获取ImageUrl地址 
     *  
     * @param HTML 
     * @return 
     */  
    private List<String> getImageUrl(String HTML) {  
        Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);  
        List<String> listImgUrl = new ArrayList<String>();  
        while (matcher.find()) {  
            listImgUrl.add(matcher.group());  
        }  
        return listImgUrl;  
    }  
  
    /*** 
     * 获取ImageSrc地址 
     *  
     * @param listImageUrl 
     * @return 
     */  
    private List<String> getImageSrc(List<String> listImageUrl) {  
        List<String> listImgSrc = new ArrayList<String>();  
        for (String image : listImageUrl) {  
            Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image);  
            while (matcher.find()) {  
                listImgSrc.add(matcher.group().substring(0, matcher.group().length() - 1));  
            }  
        }  
        return listImgSrc;  
    }  
  
    /*** 
     * 下载图片 
     *  
     * @param listImgSrc 
     */  
    private void Download(List<String> listImgSrc) {  
        try {  
            for (String url : listImgSrc) {  
                String imageName = url.substring(url.lastIndexOf("/") + 1, url.length());  
                URL uri = new URL(url);  
                InputStream in = uri.openStream();  
                FileOutputStream fo = new FileOutputStream(new File("F:/temp/images",imageName));  
                byte[] buf = new byte[1024];  
                int length = 0;  
                System.out.println("开始下载:" + url);  
                while ((length = in.read(buf, 0, buf.length)) != -1) {  
                    fo.write(buf, 0, length);  
                }  
                in.close();  
                fo.close();  
                System.out.println(imageName + "下载完成");  
            }  
        } catch (Exception e) {  
            System.out.println("下载失败");  
        }  
    }  
  
      
}  